How does choice of tag-matching metric influence adaptive evolution in broader contexts? We explore the implications of different tag-matching metrics in the context of SignalGP, a tag-based genetic programming (GP) representation.
We investigate the success of five tag-matching schemes — integer, integer-symmetric, hash, hamming, and streak — in the context of SignalGP on two diagnostic GP problems: the changing-signal task and the directional-signal task. The changing-signal task evaluates how well a GP representation can associate a set of distinct behavioral responses each with a particular environmental cue. The directional-signal task evaluates how well a representation facilitates signal-response plasticity (i.e., the ability to alter responses to repeated cues during the program’s lifetime).
For each task and tag-matching metric, we evolved 200 replicate populations and compared the performance of the most fit genotypes (at the end and during evolution). We evolved populations to solve the directional-signal task for 5000 generations and populations to solve the changing-signal task (a significantly easier task) for 100 generations.
We used results from a previous set of runs (2020-03-23-step-1) to select the best mutation rates to use for each metric on each task. We selected the tag mutation rate that produced the most successful replicates (i.e., replicates that produced a program capable of perfectly solving the task) for each task/metric combination. The idea here is to give each metric the best case scenario for measuring its performance. Exploratory experiments have shown that each metric differs in how it responds to mutation rate (some more sensitive than others).
Directional-signal task per-bit tag mutation rates:
Changing-signal task per-bit tag mutation rates:
Note that the mutation rates for the changing-signal task are higher (per metric) than those for the directional-signal task. This is likely because the changing-signal task requires fewer inter-program tag-matching relationships to be maintained over time.
Load required R packages.
library(tidyr)
library(ggplot2)
library(plyr)
library(dplyr)
library(cowplot)
# Configure our default graphing theme
theme_set(theme_cowplot())
These analyses were conducted in the following computing environment:
print(version)
## _
## platform x86_64-apple-darwin15.6.0
## arch x86_64
## os darwin15.6.0
## system x86_64, darwin15.6.0
## status
## major 3
## minor 6.2
## year 2019
## month 12
## day 12
## svn rev 77560
## language R
## version.string R version 3.6.2 (2019-12-12)
## nickname Dark and Stormy Night
Load the most performant organisms from each condition/replicate.
extract_max_fit <- function(data_path) {
data <- read.csv(data_path, na.strings="NONE")
data$matchbin_metric <- factor(data$matchbin_metric,
levels=c("hamming",
"integer",
"integer-symmetric",
"hash",
"streak",
"streak-exact"))
data$tag_mut_rate <- factor(as.numeric(data$MUT_RATE__FUNC_TAG_BF))
return(data)
}
exp_name <- "2020-03-23"
out_dir <- "2020-03-23-step-2"
dst_data_loc <- paste("../data/",exp_name,"/step-two/dir-sig/max_fit_orgs.csv", sep="")
dst_data <- extract_max_fit(dst_data_loc)
cst_data_loc <- paste("../data/",exp_name,"/step-two/chg-env/max_fit_orgs.csv", sep="")
cst_data <- extract_max_fit(cst_data_loc)
num_replicates <- 200
alpha <- 0.05
mult_comp_method <- "holm"
Load fitness over time data for each task.
dst_fot_data_loc <- paste("../data/", exp_name, "/step-two/dir-sig/fot.csv", sep="")
dst_fot_data <- read.csv(dst_fot_data_loc, na.strings="NONE")
dst_fot_data$tag_metric <- factor(dst_fot_data$tag_metric,
levels=c("hamming",
"integer",
"integer-symmetric",
"hash",
"streak",
"streak-exact"))
dst_fot_data$tag_mut_rate <- factor(as.numeric(dst_fot_data$tag_mut_rate))
cst_fot_data_loc <- paste("../data/",exp_name,"/step-two/chg-env/fot.csv", sep="")
cst_fot_data <- read.csv(cst_fot_data_loc, na.strings="NONE")
cst_fot_data$matchbin_metric <- factor(cst_fot_data$matchbin_metric,
levels=c("hamming",
"integer",
"integer-symmetric",
"hash",
"streak",
"streak-exact"))
cst_fot_data$tag_mut_rate <- factor(as.numeric(cst_fot_data$MUT_RATE__FUNC_TAG_BF))
generation_cutoffs <- c(500, 1000, 3000, 5000)
Here, we’ll plot the number of solutions found before each of 500, 1000, 3000, 5000 generations.
exp_prefix <- "dst"
for (gen in generation_cutoffs) {
p <- ggplot(filter(dst_data, solution=="1" & update <= gen), aes(x=matchbin_metric, fill=matchbin_metric)) +
geom_bar(stat="count", position="dodge") +
geom_text(stat="count",
mapping=aes(label=..count..),
position=position_dodge(0.9), vjust=0) +
ggtitle(paste("DST solutions prior to generation ", gen, sep="")) +
ylab("# successful replicates") +
ylim(0, num_replicates + 2) +
scale_x_discrete(name="Tag-matching metric",
limits=c("hamming",
"integer",
"integer-symmetric",
"hash",
"streak")) +
theme(legend.position="none",
axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-solutions-",gen,".png",sep=""), width=16, height=8)
print(p)
}
Compare each metric’s successes using Fisher’s exact using the holm method to correct for multiple comparisons.
do_ft <- function(data, metric_a, metric_b, n) {
a_successes <-
nrow(filter(data, matchbin_metric==metric_a & solution=="1"))
b_successes <-
nrow(filter(data, matchbin_metric==metric_b & solution=="1"))
table <-
matrix(c(a_successes,
b_successes,
n-a_successes,
n-b_successes),
nrow=2)
rownames(table) <- c(metric_a, metric_b)
colnames(table) <- c("success", "fail")
ft <- fisher.test(table)
return(ft)
}
metrics <- c("hamming", "hash", "streak", "integer", "integer-symmetric")
ft_results = list()
ft_p_values = list()
# Make all pairwise comparisons w/Fisher's exact.
for (i in seq(1, length(metrics))) {
for (k in seq(i, length(metrics))) {
if (i == k) { next() }
# print(paste("i = ", i, "; k = ", k, sep=""))
metric_a <- metrics[i]
metric_b <- metrics[k]
comp_name <- paste(metric_a, "_vs_", metric_b, sep="")
# print(comp_name)
ft_results[[comp_name]] <- do_ft(dst_data, metric_a, metric_b, num_replicates)
ft_p_values[[comp_name]] <- ft_results[[comp_name]]$p.value
}
}
# Correct for multiple comparisons.
adjusted <- p.adjust(ft_p_values,
method=mult_comp_method)
for (key in names(adjusted)) {
print(paste("Comparison: ", key, sep=""))
adjusted_p <- adjusted[[key]]
print(paste(" adjusted p value: ", adjusted_p, sep=""))
if (adjusted_p < alpha) { print(" *significant") }
}
## [1] "Comparison: hamming_vs_hash"
## [1] " adjusted p value: 0.000807472586130674"
## [1] " *significant"
## [1] "Comparison: hamming_vs_streak"
## [1] " adjusted p value: 0.124195517379838"
## [1] "Comparison: hamming_vs_integer"
## [1] " adjusted p value: 2.64005446076842e-15"
## [1] " *significant"
## [1] "Comparison: hamming_vs_integer-symmetric"
## [1] " adjusted p value: 1.00283327099096e-17"
## [1] " *significant"
## [1] "Comparison: hash_vs_streak"
## [1] " adjusted p value: 1.39865218460957e-07"
## [1] " *significant"
## [1] "Comparison: hash_vs_integer"
## [1] " adjusted p value: 2.85294652241976e-05"
## [1] " *significant"
## [1] "Comparison: hash_vs_integer-symmetric"
## [1] " adjusted p value: 1.13267683804522e-06"
## [1] " *significant"
## [1] "Comparison: streak_vs_integer"
## [1] " adjusted p value: 1.19861616134406e-22"
## [1] " *significant"
## [1] "Comparison: streak_vs_integer-symmetric"
## [1] " adjusted p value: 1.51925638619314e-25"
## [1] " *significant"
## [1] "Comparison: integer_vs_integer-symmetric"
## [1] " adjusted p value: 0.558439901506227"
Looking at scores is a little more fine-grained than looking at solutions.
exp_prefix <- "dst"
p <- ggplot(dst_data, aes(x=matchbin_metric, y=aggregate_score, fill=matchbin_metric)) +
geom_boxplot() +
ggtitle(paste("Directional signal task - scores", sep="")) +
theme(axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-scores.png",sep=""), width=16, height=8)
print(p)
For this first set of plots, we look at all replicates. Replicates in which no solution evolved are assumed to have found a solution in the final generation.
exp_prefix <- "dst"
p <- ggplot(dst_data, aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
geom_boxplot() +
geom_jitter() +
ggtitle(paste("Directional signal task - time to solution", sep="")) +
theme(axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-time-to-solution.png",sep=""), width=16, height=8)
print(p)
ggplot(dst_data, aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
# geom_boxplot() +
geom_jitter(alpha=0.75) +
stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
stat_summary(fun.y = mean, geom = "point", colour = "black") +
ggtitle(paste("Directional signal task - time to solution", sep="")) +
theme(axis.text.x=element_text(size=8))
A better way to look at time to solution is to pick a threshold, K, and look at the first K replicates to produce a solution. Because 20% of replicates in all metrics found a solution, we’ll use a 20% threshold (or the first 40 replicates). For this to be a completely fair comparison, we should have selected this threshold a priori. We’ll treat these comparisions as exploratory, and we’ll run a second set of runs (experiments 2020-03-27) with a threshold selected a priori.
threshold <- 40
metrics <- c("hamming", "hash", "integer", "integer-symmetric", "streak")
dst_update_rankings <- data.frame(matchbin_metric=character(),
aggregate_score=numeric(),
update=numeric(),
update_rank=numeric(),
solution=numeric())
for (metric in metrics) {
dst_metric <-
filter(dst_data, matchbin_metric==metric)
dst_metric <-
subset.data.frame(dst_metric, select=c("aggregate_score",
"update",
"matchbin_metric",
"solution"))
dst_metric$update <- as.numeric(dst_metric$update)
dst_metric$update_ranking <- rank(dst_metric$update, ties.method="random")
dst_update_rankings <- rbind(dst_update_rankings, dst_metric)
}
exp_prefix <- "dst"
ggplot(filter(dst_update_rankings, update_ranking <= threshold), aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
geom_jitter(aes(color=matchbin_metric)) +
geom_boxplot(alpha=0.75) +
ggtitle(paste("DST - first ", threshold, " replicates to find a solution", sep="")) +
theme(legend.position="none",
axis.text.x=element_text(size=8)) +
ylab("generations until solution") +
ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top-",threshold,"-bp.png", sep=""))
## Saving 7 x 5 in image
ggplot(filter(dst_update_rankings, update_ranking <= threshold), aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
geom_jitter(alpha=0.75) +
stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
stat_summary(fun.y = mean, geom = "point", colour = "black") +
ggtitle(paste("DST - first ",threshold," replicates to find a solution", sep="")) +
ylab("generations until solution") +
theme(legend.position="none",
axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top-",threshold,"-ci.png", sep=""))
## Saving 7 x 5 in image
Let’s check to see if the underlying distributions of time to solution is different across metrics using a Kruskal Wallis test.
dst_first_k_solutions <- filter(dst_update_rankings, update_ranking <= threshold)
kruskal.test(formula = update ~ matchbin_metric, data=dst_first_k_solutions)
##
## Kruskal-Wallis rank sum test
##
## data: update by matchbin_metric
## Kruskal-Wallis chi-squared = 131.58, df = 4, p-value < 2.2e-16
Indeed, there is a difference. We can use a post-hoc Wilcoxon rank sum test to identify which distributions are different (with a holm to correct for multiple comparisons).
pairwise.wilcox.test(x=dst_first_k_solutions$update,
g=dst_first_k_solutions$matchbin_metric,
p.adjust.method = mult_comp_method,
exact=FALSE,
conf.int=TRUE)
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: dst_first_k_solutions$update and dst_first_k_solutions$matchbin_metric
##
## hamming integer integer-symmetric hash
## integer 2.3e-11 - - -
## integer-symmetric 1.6e-11 0.25614 - -
## hash 1.9e-06 0.00019 2.4e-06 -
## streak 2.4e-06 4.3e-13 1.1e-12 2.9e-10
##
## P value adjustment method: holm
Only integer and integer-symmetric metrics do not have a significant difference.
updates <- c(10, 30, 50, 100, 300, 500, 1000, 3000, 5000)
plot_data <- filter(dst_fot_data, update %in% updates)
plot_data$update <- factor(plot_data$update)
ggplot(plot_data, aes(x=update, y=score, fill=tag_metric)) +
geom_boxplot() +
ggtitle("DST - score over time") +
ggsave(paste("./imgs/",out_dir,"/dst_score_over_time_box.pdf", sep=""), width=21, height=8)
ggplot(filter(dst_fot_data), aes(x=update, y=score, color=tag_metric, fill=tag_metric)) +
stat_summary(geom = "line", fun.y = mean) +
stat_summary(geom = "ribbon", fun.data = mean_cl_boot, fun.args=list(conf.int=0.95), alpha = 0.3, color=NA) +
ggtitle("DST - score over time") +
ggsave(paste("./imgs/",out_dir,"/dst_score_over_time.pdf", sep=""), width=21, height=10)
We’ll do all the same analyses for the changing-signal task.
generation_cutoffs <- c(50, 100)
exp_prefix <- "cst"
for (gen in generation_cutoffs) {
p <- ggplot(filter(cst_data, solution=="1" & update <= gen), aes(x=matchbin_metric, fill=matchbin_metric)) +
geom_bar(stat="count", position="dodge") +
geom_text(stat="count",
mapping=aes(label=..count..),
position=position_dodge(0.9), vjust=0) +
ggtitle(paste("CST solutions prior to generation ", gen, sep="")) +
ylab("# successful replicates") +
ylim(0, num_replicates + 2) +
scale_x_discrete(name="Tag-matching metric",
limits=c("hamming",
"integer",
"integer-symmetric",
"hash",
"streak")) +
theme(legend.position="none",
axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-solutions-",gen,".png",sep=""), width=16, height=8)
print(p)
}
Compare each metric’s successes using Fisher’s exact.
metrics <- c("hamming", "hash", "streak", "integer", "integer-symmetric")
ft_results = list()
ft_p_values = list()
for (i in seq(1, length(metrics))) {
for (k in seq(i, length(metrics))) {
if (i == k) { next() }
# print(paste("i = ", i, "; k = ", k, sep=""))
metric_a <- metrics[i]
metric_b <- metrics[k]
comp_name <- paste(metric_a, "_vs_", metric_b, sep="")
# print(comp_name)
ft_results[[comp_name]] <- do_ft(cst_data, metric_a, metric_b, num_replicates)
ft_p_values[[comp_name]] <- ft_results[[comp_name]]$p.value
}
}
adjusted <- p.adjust(ft_p_values,
method="bonferroni")
for (key in names(adjusted)) {
print(paste("Comparison: ", key, sep=""))
adjusted_p <- adjusted[[key]]
print(paste(" adjusted p value: ", adjusted_p, sep=""))
if (adjusted_p < 0.05) { print(" *significant") }
}
## [1] "Comparison: hamming_vs_hash"
## [1] " adjusted p value: 4.83259611792869e-11"
## [1] " *significant"
## [1] "Comparison: hamming_vs_streak"
## [1] " adjusted p value: 1"
## [1] "Comparison: hamming_vs_integer"
## [1] " adjusted p value: 2.36747458507527e-38"
## [1] " *significant"
## [1] "Comparison: hamming_vs_integer-symmetric"
## [1] " adjusted p value: 6.82464555680076e-37"
## [1] " *significant"
## [1] "Comparison: hash_vs_streak"
## [1] " adjusted p value: 4.83259611792869e-11"
## [1] " *significant"
## [1] "Comparison: hash_vs_integer"
## [1] " adjusted p value: 4.03183617537378e-11"
## [1] " *significant"
## [1] "Comparison: hash_vs_integer-symmetric"
## [1] " adjusted p value: 3.0335335243374e-10"
## [1] " *significant"
## [1] "Comparison: streak_vs_integer"
## [1] " adjusted p value: 2.36747458507527e-38"
## [1] " *significant"
## [1] "Comparison: streak_vs_integer-symmetric"
## [1] " adjusted p value: 6.82464555680076e-37"
## [1] " *significant"
## [1] "Comparison: integer_vs_integer-symmetric"
## [1] " adjusted p value: 1"
These first two plots assume solutions were found in the final generation of unsuccessful replicates.
exp_prefix <- "cst"
p <- ggplot(cst_data, aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
geom_boxplot() +
geom_jitter() +
ggtitle(paste("Changing signal task - time to solution", sep="")) +
theme(axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-time-to-time-to-solution.png",sep=""), width=16, height=8)
print(p)
ggplot(cst_data, aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
geom_jitter(alpha=0.75) +
stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
stat_summary(fun.y = mean, geom = "point", colour = "black") +
ggtitle(paste("Changing signal task - time to solution", sep="")) +
theme(axis.text.x=element_text(size=8))
For a better comparisons we look at the generation solutions were found for first 25% of replicates for each metric that found a solution.
threshold <- 50
metrics <- c("hamming", "hash", "integer", "integer-symmetric", "streak")
cst_update_rankings <- data.frame(matchbin_metric=character(),
score=numeric(),
update=numeric(),
update_rank=numeric(),
solution=numeric())
for (metric in metrics) {
cst_metric <-
filter(cst_data, matchbin_metric==metric)
cst_metric <-
subset.data.frame(cst_metric, select=c("score",
"update",
"matchbin_metric",
"solution"))
cst_metric$update <- as.numeric(cst_metric$update)
cst_metric$update_ranking <- rank(cst_metric$update, ties.method="random")
cst_update_rankings <- rbind(cst_update_rankings, cst_metric)
}
exp_prefix <- "cst"
ggplot(filter(cst_update_rankings, update_ranking <= threshold), aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
geom_jitter(aes(color=matchbin_metric)) +
geom_boxplot(alpha=0.75) +
ggtitle(paste("CST - first ", threshold, " replicates to find a solution", sep="")) +
theme(legend.position="none",
axis.text.x=element_text(size=8)) +
ylab("generations until solution") +
ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top-",threshold,"-bp.png", sep=""))
## Saving 7 x 5 in image
ggplot(filter(cst_update_rankings, update_ranking <= threshold), aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
geom_jitter(alpha=0.75) +
stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
stat_summary(fun.y = mean, geom = "point", colour = "black") +
ggtitle(paste("CST - first ",threshold," replicates to find a solution", sep="")) +
ylab("generations until solution") +
theme(legend.position="none",
axis.text.x=element_text(size=8)) +
ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top-",threshold,"-ci.png", sep=""))
## Saving 7 x 5 in image
Let’s do a Kruskal-Wallis test to see if there’s a difference in underlying distributions.
cst_first_K_solutions <- filter(cst_update_rankings, update_ranking <= threshold)
kruskal.test(formula = update ~ matchbin_metric, data=cst_first_K_solutions)
##
## Kruskal-Wallis rank sum test
##
## data: update by matchbin_metric
## Kruskal-Wallis chi-squared = 100.44, df = 4, p-value < 2.2e-16
Indeed, there is, so we’ll do a post-hoc pairwise Wilcoxon rank sum test to test for individual differences.
pairwise.wilcox.test(x=cst_first_K_solutions$update,
g=cst_first_K_solutions$matchbin_metric,
p.adjust.method = mult_comp_method,
exact=FALSE,
conf.int=TRUE)
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: cst_first_K_solutions$update and cst_first_K_solutions$matchbin_metric
##
## hamming integer integer-symmetric hash
## integer 1.9e-10 - - -
## integer-symmetric 1.9e-08 0.057 - -
## hash 2.3e-09 0.049 1.000 -
## streak 1.000 1.4e-10 1.8e-08 1.8e-09
##
## P value adjustment method: holm
updates <- c(10, 30, 50, 100)
plot_data <- filter(cst_fot_data, update %in% updates)
plot_data$update <- factor(plot_data$update)
ggplot(plot_data, aes(x=update, y=score, fill=matchbin_metric)) +
geom_boxplot() +
ggtitle("CST - score over time") +
ggsave(paste("./imgs/",out_dir,"/cst_score_over_time_box.pdf", sep=""), width=21, height=8)
ggplot(filter(cst_fot_data), aes(x=update, y=score, color=matchbin_metric, fill=matchbin_metric)) +
stat_summary(geom = "line", fun.y = mean) +
stat_summary(geom = "ribbon", fun.data = mean_cl_boot, fun.args=list(conf.int=0.95), alpha = 0.3, color=NA) +
ggtitle("CST - score over time") +
ggsave(paste("./imgs/",out_dir,"/cst_score_over_time.pdf", sep=""), width=21, height=10)